pdata_sharing <- df %>%
select(.run.number., .step., share.data.,
mean.grants.groups:sum..total.datasets..of.groups) %>%
pivot_longer(-c(.run.number., .step., share.data.)) %>%
drop_na() %>%
mutate(share.data. = if_else(share.data., "Data sharing", "No data sharing"))
pdata_sharing %>%
filter(str_detect(name, "gini")) %>%
mutate(name = str_remove(name, "\\..*") %>% str_to_title()) %>%
ggplot(aes(.step., value, colour = share.data.)) +
geom_smooth(se = FALSE) + # interval is anyways not showing up, so remove from legend
facet_wrap(vars(name), nrow = 2, scales = "free_y") +
scale_colour_viridis_d(option = "C", begin = .1, end = .9, alpha = .7) +
labs(x = "step", y = "Gini index", colour = NULL) +
theme(legend.position = c(.8, .5))
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Initially higher inequality with data sharing, but equals out over time.
p <- pdata_sharing %>%
filter(str_detect(name, "gini")) %>%
ggplot(aes(.step., value, colour = share.data., group = .run.number.)) +
geom_line() +
facet_wrap(vars(name), nrow = 2)
plotly::ggplotly(p)
pdata_sharing %>%
filter(str_detect(name, "total\\.primary"),
.step. == 500) %>%
ggplot(aes(share.data., value, fill = share.data.)) +
geom_boxplot(notch = TRUE, show.legend = FALSE, width = .4, outlier.alpha = 0) +
geom_jitter(alpha = .35, aes(fill = NULL), show.legend = FALSE, width = .1) +
scale_fill_viridis_d(option = "C", begin = .1, end = .9, alpha = .5) +
labs(x = NULL, y = "# of total publications produced") +
coord_flip()